Introduction

In this report, the codes and results is summarised for the tasks in Homework 3.

Firstly, we get the consumption data and eliminate 2016-03-27 due to missing hour. After that, we will look at the general summary of the data.

library(kableExtra)
library(data.table)
library(glmnet)
library(CVXR)
library(tidyverse)

data_path = "/home/burak/Desktop/Lectures/IE582/HW3/Data/TurkiyeCons.csv"
data = fread(data_path)
colnames(data)<-c("Date","Hour","Consumption")
data$Consumption = gsub(".","",data$Consumption,fixed = T)
data$Consumption = as.numeric(gsub(",",".",data$Consumption,fixed = T))
data_clean = data%>%transmute(Date=lubridate::dmy(Date),Hour = as.numeric(substr(Hour,1,2)),Consumption)

summary(data_clean)
##       Date                 Hour        Consumption   
##  Min.   :2016-01-01   Min.   : 0.00   Min.   :    0  
##  1st Qu.:2017-03-24   1st Qu.: 5.75   1st Qu.:28835  
##  Median :2018-06-16   Median :11.50   Median :32929  
##  Mean   :2018-06-16   Mean   :11.50   Mean   :32759  
##  3rd Qu.:2019-09-08   3rd Qu.:17.25   3rd Qu.:36350  
##  Max.   :2020-11-30   Max.   :23.00   Max.   :47062
data_clean%>%filter(Consumption==0)
##         Date Hour Consumption
## 1 2016-03-27    2           0
data_clean%>%group_by(Date)%>%summarise(n=length(unique(Hour)))%>%ungroup()%>%filter(n!=24)
## # A tibble: 1 x 2
##   Date           n
##   <date>     <int>
## 1 2016-03-27    23
# make NA 2016-03-27 because of save light time.
setDT(data_clean)
data_clean[Date=="2016-03-27",Hour:=c(0:23)]
data_clean[Date=="2016-03-27",Consumption:=NA]

summary(data_clean)
##       Date                 Hour        Consumption   
##  Min.   :2016-01-01   Min.   : 0.00   Min.   :15333  
##  1st Qu.:2017-03-24   1st Qu.: 5.75   1st Qu.:28840  
##  Median :2018-06-16   Median :11.50   Median :32932  
##  Mean   :2018-06-16   Mean   :11.50   Mean   :32763  
##  3rd Qu.:2019-09-08   3rd Qu.:17.25   3rd Qu.:36352  
##  Max.   :2020-11-30   Max.   :23.00   Max.   :47062  
##                                       NA's   :24
ggplot(data_clean)+geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Consumption))+
  theme_bw()+labs(x="DateTime")

a)

In this part we will analyze the result of naive prediction of lag48 and lag168. For all tasks, we use until 2020-10-31 as train period and the rest as test period.

test_start = lubridate::as_date("2020-11-01")

naive_pred = data_clean%>%arrange(Date,Hour)%>%mutate(naive_lag48=lag(Consumption,48),
                                         naive_lag168=lag(Consumption,168))%>%filter(Date>=test_start)

naive_final = melt(naive_pred,1:3,variable.name="Model",value.name="Prediction")

naive_mape = naive_final%>%group_by(Date,Model)%>%summarise(Mape=mean(abs(Prediction-Consumption)/Consumption))%>%ungroup()

naive_mape_general = naive_mape%>%group_by(Model)%>%summarise(TestStart=min(Date),TestEnd=max(Date),
                                                               NDay=length(unique(Date)),
                                                               Min_Mape=min(Mape),Q25_Mape=quantile(Mape,0.25),
                                                               Median_Mape=median(Mape),Avg_Mape=mean(Mape),
                                                               Q75_Mape=quantile(Mape,0.75),Max_Mape=max(Mape))%>%ungroup()

p1 = ggplot(naive_final)+geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Consumption,col="Actual"))+
  geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Prediction,col=Model))+
  theme_bw()+labs(x="DateTime")

p2 = ggplot(naive_mape)+geom_line(aes(x=Date,y=Mape*100,col=Model))+
  theme_bw()+labs(x="Date",y="MAPE (%)")

gridExtra::grid.arrange(p1,p2,nrow=2)

naive_mape_general %>%
kbl() %>%
  kable_classic(full_width = F, html_font = "Cambria")
Model TestStart TestEnd NDay Min_Mape Q25_Mape Median_Mape Avg_Mape Q75_Mape Max_Mape
naive_lag48 2020-11-01 2020-11-30 30 0.0074837 0.0261611 0.0586450 0.0778920 0.1453798 0.2038553
naive_lag168 2020-11-01 2020-11-30 30 0.0112858 0.0186489 0.0292689 0.0345367 0.0374015 0.1234274

We can observe that lag168 gives better performance compared to lag48. This may be because of the different characteristics of weekdays. Weekends behave different than working days.

b)

In this part, we build linear regression model for long format including all hours.

lm_long_data = data_clean%>%arrange(Date,Hour)%>%mutate(lag48=lag(Consumption,48),
                                                      lag168=lag(Consumption,168))
lm_long_train = lm_long_data%>%filter(Date<test_start)%>%na.omit()
lm_long_test = lm_long_data%>%filter(Date>=test_start)
lm_long_fit = lm(data = lm_long_train,Consumption~lag48+lag168)
summary(lm_long_fit)
## 
## Call:
## lm(formula = Consumption ~ lag48 + lag168, data = lm_long_train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17125.7   -984.3     -1.2   1015.7  16097.4 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.570e+03  8.351e+01   18.80   <2e-16 ***
## lag48       3.091e-01  3.096e-03   99.82   <2e-16 ***
## lag168      6.430e-01  3.094e-03  207.83   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2388 on 42141 degrees of freedom
## Multiple R-squared:  0.7757, Adjusted R-squared:  0.7757 
## F-statistic: 7.288e+04 on 2 and 42141 DF,  p-value: < 2.2e-16
lm_long_test$linear_regression_long <- predict(lm_long_fit,lm_long_test)

lm_long_final = melt(lm_long_test%>%select(Date,Hour,Consumption,linear_regression_long),1:3,variable.name="Model",value.name="Prediction")

lm_long_mape = lm_long_final%>%group_by(Date,Model)%>%summarise(Mape=mean(abs(Prediction-Consumption)/Consumption))%>%ungroup()

lm_long_mape_general = lm_long_mape%>%group_by(Model)%>%summarise(TestStart=min(Date),TestEnd=max(Date),
                                                              NDay=length(unique(Date)),
                                                              Min_Mape=min(Mape),Q25_Mape=quantile(Mape,0.25),
                                                              Median_Mape=median(Mape),Avg_Mape=mean(Mape),
                                                              Q75_Mape=quantile(Mape,0.75),Max_Mape=max(Mape))%>%ungroup()

p1 = ggplot(lm_long_final)+geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Consumption,col="Actual"))+
  geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Prediction,col=Model))+
  theme_bw()+labs(x="DateTime")

p2 = ggplot(lm_long_mape)+geom_line(aes(x=Date,y=Mape*100,col=Model))+
  theme_bw()+labs(x="Date",y="MAPE (%)")

gridExtra::grid.arrange(p1,p2,nrow=2)

lm_long_mape_general %>%
kbl() %>%
  kable_classic(full_width = F, html_font = "Cambria")
Model TestStart TestEnd NDay Min_Mape Q25_Mape Median_Mape Avg_Mape Q75_Mape Max_Mape
linear_regression_long 2020-11-01 2020-11-30 30 0.0121335 0.024273 0.0406424 0.0412358 0.0551997 0.0889939

We observe that both lag48 and lag168 are significant in regression. Overall performance is between two naive methods.

c)

In this part, because of hourly seasonality, we have build linear regression for each our then aggregated results for overall performance.

lm_hourly_test=tibble()
for(hour in 0:23){
  temp_train = lm_long_data%>%filter(Date<test_start)%>%filter(Hour==hour)%>%na.omit()
  temp_test = lm_long_data%>%filter(Date>=test_start)%>%filter(Hour==hour)
  temp_fit = lm(data = temp_train,Consumption~lag48+lag168)
  temp_test$linear_regression_hourly <- predict(temp_fit,temp_test)
  lm_hourly_test=rbind(lm_hourly_test,temp_test)  
}
lm_hourly_final = melt(lm_hourly_test%>%select(Date,Hour,Consumption,linear_regression_hourly),1:3,variable.name="Model",value.name="Prediction")

lm_hourly_mape = lm_hourly_final%>%group_by(Date,Model)%>%summarise(Mape=mean(abs(Prediction-Consumption)/Consumption))%>%ungroup()

lm_hourly_mape_general = lm_hourly_mape%>%group_by(Model)%>%summarise(TestStart=min(Date),TestEnd=max(Date),
                                                                  NDay=length(unique(Date)),
                                                                  Min_Mape=min(Mape),Q25_Mape=quantile(Mape,0.25),
                                                                  Median_Mape=median(Mape),Avg_Mape=mean(Mape),
                                                                  Q75_Mape=quantile(Mape,0.75),Max_Mape=max(Mape))%>%ungroup()

p1 = ggplot(lm_hourly_final)+geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Consumption,col="Actual"))+
  geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Prediction,col=Model))+
  theme_bw()+labs(x="DateTime")

p2 = ggplot(lm_hourly_mape)+geom_line(aes(x=Date,y=Mape*100,col=Model))+
  theme_bw()+labs(x="Date",y="MAPE (%)")

gridExtra::grid.arrange(p1,p2,nrow=2)

lm_hourly_mape_general %>%
kbl() %>%
  kable_classic(full_width = F, html_font = "Cambria")
Model TestStart TestEnd NDay Min_Mape Q25_Mape Median_Mape Avg_Mape Q75_Mape Max_Mape
linear_regression_hourly 2020-11-01 2020-11-30 30 0.0095117 0.0252687 0.0443367 0.0424977 0.0541299 0.0850646

According to performance results, we can derive that modelling each hour separately does not improve performance significantly for linear regression.

d)

In this part, we will apply lasso regression with L1 penalty by using cv.glmnet. For cv.glmnet, we will use alpha = 1 parameter to apply L1 because alpha=0 gives ridge and alpha =1 gives lasso regression. Also, we have modeled each hour separately and for each hour, we use the wide lag48 and lag168 feature set.

Also, we will analyze coefficients and lambda values of models.

feature_matrix = dcast(as.data.table(lm_long_data%>%na.omit()),Date~Hour,value.var=c("lag48","lag168"))
train_matrix = feature_matrix%>%filter(Date<test_start)
test_matrix = feature_matrix%>%filter(Date>=test_start)

train_dates = train_matrix$Date
train_feature = as.matrix(train_matrix%>%select(-Date))
test_feature = as.matrix(test_matrix%>%select(-Date))

par(mfrow=c(4,2))
full_summary_glmnet = tibble()
glmnet_lasso_hourly_final = tibble()
for(hour in 0:23){
  train_target = (lm_long_data%>%filter(Hour==hour&Date%in%train_dates))$Consumption  
  set.seed(1)
  temp_fit = cv.glmnet(x = train_feature,y=train_target,nfolds = 10,alpha=1)
  plot(temp_fit)
  plot(temp_fit$glmnet.fit)
  
  temp_pred = data.table(Date=test_matrix$Date,Hour=hour,glmnet_lasso_hourly_lambdamin=predict(temp_fit,test_feature,s=c("lambda.min"))[,1],
             glmnet_lasso_hourly_lambda1se=predict(temp_fit,test_feature,s=c("lambda.1se"))[,1])

  glmnet_lasso_hourly_final = rbind(glmnet_lasso_hourly_final,temp_pred)
  
  full_summary_glmnet=full_summary_glmnet%>%
    bind_rows(data.table(variable=names(as.matrix(coef(temp_fit))[,1]),coefficent=as.matrix(coef(temp_fit))[,1],
       model=paste0("model_for_hour_",hour),lambda1se=temp_fit$lambda.1se,lambdamin=temp_fit$lambda.min)
  )
}

par(mfrow=c(1,1))

full_summary_glmnet=full_summary_glmnet%>%group_by(model)%>%mutate(coef_order=row_number())%>%ungroup()%>%
  group_by(variable)%>%mutate(model_order=row_number())%>%ungroup()
ggplot(full_summary_glmnet%>%filter(variable!="(Intercept)"))+geom_tile(aes(x=reorder(variable,coef_order),y=reorder(model,model_order),fill=coefficent))+
  theme_bw()+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

ggplot(full_summary_glmnet%>%select(model,lambda1se,lambdamin,model_order)%>%unique())+
  geom_bar(aes(x=reorder(model,model_order),y=lambda1se,fill="lambda1se"),stat = "identity")+theme_bw()+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

ggplot(full_summary_glmnet%>%select(model,lambda1se,lambdamin,model_order)%>%unique())+
  geom_bar(aes(x=reorder(model,model_order),y=lambdamin,fill="lambdamin"),stat = "identity")+theme_bw()+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

glmnet_lasso_hourly_final=melt(glmnet_lasso_hourly_final,1:2,variable.name="Model",value.name="Prediction")%>%left_join(lm_long_data%>%select(Date,Hour,Consumption))

glmnet_lasso_hourly_mape = glmnet_lasso_hourly_final%>%group_by(Date,Model)%>%summarise(Mape=mean(abs(Prediction-Consumption)/Consumption))%>%ungroup()

glmnet_lasso_hourly_mape_general = glmnet_lasso_hourly_mape%>%group_by(Model)%>%summarise(TestStart=min(Date),TestEnd=max(Date),
                                                                      NDay=length(unique(Date)),
                                                                      Min_Mape=min(Mape),Q25_Mape=quantile(Mape,0.25),
                                                                      Median_Mape=median(Mape),Avg_Mape=mean(Mape),
                                                                      Q75_Mape=quantile(Mape,0.75),Max_Mape=max(Mape))%>%ungroup()

p1 = ggplot(glmnet_lasso_hourly_final)+geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Consumption,col="Actual"))+
  geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Prediction,col=Model))+
  theme_bw()+labs(x="DateTime")

p2 = ggplot(glmnet_lasso_hourly_mape)+geom_line(aes(x=Date,y=Mape*100,col=Model))+
  theme_bw()+labs(x="Date",y="MAPE (%)")

gridExtra::grid.arrange(p1,p2,nrow=2)

glmnet_lasso_hourly_mape_general %>%
kbl() %>%
  kable_classic(full_width = F, html_font = "Cambria")
Model TestStart TestEnd NDay Min_Mape Q25_Mape Median_Mape Avg_Mape Q75_Mape Max_Mape
glmnet_lasso_hourly_lambdamin 2020-11-01 2020-11-30 30 0.0079057 0.0153607 0.0187656 0.0243023 0.0255123 0.0801177
glmnet_lasso_hourly_lambda1se 2020-11-01 2020-11-30 30 0.0069945 0.0146829 0.0194586 0.0241893 0.0277244 0.0651066

We observe that;

  • The models of consecutive hours are similar in terms of the coefficients of features. This is expected because consecutive hours are higly correlated so, it is possible to have similar coefficients for feature set.
  • Also, lambda1se values are similar for consecutive models.
  • Some features takes positive coefficent for all models.
  • Lambdamin and lambda1se gives close results. So, lambda1se is more efficient to use.
  • Performance improves significantly compared to previous tasks.

e) (BONUS)

In this part, we will apply fused penalized regression. Firstly, we will apply cross validation to select (lambda1, lambda2) values.

For an objective function; following function is used. Problem is solved via CVXR.

        obj <- sum_squares(train_target - cbind(1,train_feature) %*% beta) / (2 * n) +
          lambda2 * sum_squares(beta[-1])+ # Ridge Penalty
          lambda1 * sum(diff(beta)[-c(1,25)]) # Fused Lasso

From glmnet documentation, sum of squares is divided by 2*number_of_obs because of scaling. Also, intercept is not included in ridge penalty. So, we used sum_squares(beta[-1]) as ridge penalty. Lastly, because we have 24 + 24 feature for lag48 and lag168, we should exclude diff(beta)[-c(1,25)] where 1 is the difference with intercept and 25 is the difference between lag_168_0 - lag_48_23

Now, we apply cross validation with 5 fold, lambda1=lambda2=c(0,10,20…..,90,100) and for 0,6,12,18 hours. The more extended grid may required but because of computational issues, we have moved with these setting. Cross valdiation results are compared via MSE.

nfold=5
feature_matrix = dcast(as.data.table(lm_long_data%>%na.omit()),Date~Hour,value.var=c("lag48","lag168"))
train_matrix = feature_matrix%>%filter(Date<test_start)
test_matrix = feature_matrix%>%filter(Date>=test_start)
train_dates = train_matrix$Date
cv_result = tibble()
for(fold in 1:nfold){
  set.seed(fold)
  out_fold_dates = sample(train_dates,length(train_dates)/nfold)
  train_feature = as.matrix(train_matrix%>%filter(!Date%in%out_fold_dates)%>%dplyr::select(-Date))
  test_feature = as.matrix(train_matrix%>%filter(Date%in%out_fold_dates)%>%dplyr::select(-Date))
  
  for(hour in c(0,6,12,18)){
    train_target = (lm_long_data%>%filter(Hour==hour&Date%in%train_dates)%>%filter(!Date%in%out_fold_dates))$Consumption  
    n = length(train_feature)
    beta <- Variable(ncol(train_feature)+1)
    for(lambda1 in seq(0,100,10)){
        lambda2=lambda1
        obj <- sum_squares(train_target - cbind(1,train_feature) %*% beta) / (2 * n) +
          lambda2 * sum_squares(beta[-1])+ # Ridge Penalty
          lambda1 * sum(diff(beta)[-c(1,25)]) # Fused Lasso
        prob <- Problem(Minimize(obj))
        temp_result <- solve(prob)
        temp_beta = temp_result$getValue(beta)
        temp_pred = cbind(1,test_feature)%*%temp_beta
        temp_result=lm_long_data%>%filter(Hour==hour&Date%in%out_fold_dates)%>%dplyr::select(Date,Hour,Consumption)%>%
          mutate(Prediction=temp_pred,lambda1=lambda1,lambda2=lambda2,fold=fold)
        cv_result = rbind(cv_result,temp_result)
    }
  }
  
}

cv_result_final=cv_result%>%group_by(lambda1,lambda2)%>%summarise(MSE=mean((Consumption-Prediction)^2))%>%arrange(MSE)
cv_result_final %>%
kbl() %>%
  kable_classic(full_width = F, html_font = "Cambria")
lambda1 lambda2 MSE
90 90 2079125
100 100 2079185
80 80 2079371
70 70 2079983
60 60 2081037
50 50 2082633
40 40 2084900
30 30 2088011
20 20 2092204
10 10 2097816
0 0 2105340

According to results, lambda1=lambda2=90 gives smallest MSE. Now, we proceed with these lambdas to take prediction for each hour.

feature_matrix = dcast(as.data.table(lm_long_data%>%na.omit()),Date~Hour,value.var=c("lag48","lag168"))
train_matrix = feature_matrix%>%filter(Date<test_start)
test_matrix = feature_matrix%>%filter(Date>=test_start)

train_dates = train_matrix$Date
train_feature = as.matrix(train_matrix%>%select(-Date))
test_feature = as.matrix(test_matrix%>%select(-Date))

lambda1=90
lambda2=90
fused_lasso_hourly_final = tibble()
full_summary_fused=tibble()
for(hour in 0:23){
  train_target = (lm_long_data%>%filter(Hour==hour&Date%in%train_dates))$Consumption  
  n = length(train_feature)
  beta <- Variable(ncol(train_feature)+1)
  obj <- sum_squares(train_target - cbind(1,train_feature) %*% beta) / (2 * n) +
    lambda2 * sum_squares(beta[-1])+ # Ridge Penalty
    lambda1 * sum(diff(beta)[-c(1,25)]) # Fused Lasso
  prob <- Problem(Minimize(obj))
  temp_result <- solve(prob)
  temp_beta = temp_result$getValue(beta)
  temp_pred = cbind(1,test_feature)%*%temp_beta
  temp_result = data.table(Date=test_matrix$Date,Hour=hour,fused_lasso_hourly=temp_pred[,1])
  fused_lasso_hourly_final=rbind(fused_lasso_hourly_final,temp_result)
  full_summary_fused=full_summary_fused%>%
    bind_rows(data.table(variable=names(as.matrix(coef(temp_fit))[,1]),coefficent=temp_beta[,1],
                         model=paste0("model_for_hour_",hour)))
              
}
full_summary_fused=full_summary_fused%>%group_by(model)%>%mutate(coef_order=row_number())%>%ungroup()%>%
  group_by(variable)%>%mutate(model_order=row_number())%>%ungroup()

ggplot(full_summary_fused%>%filter(variable!="(Intercept)"))+geom_tile(aes(x=reorder(variable,coef_order),y=reorder(model,model_order),fill=coefficent))+
  theme_bw()+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

fused_lasso_hourly_final=melt(fused_lasso_hourly_final,1:2,variable.name="Model",value.name="Prediction")%>%left_join(lm_long_data%>%select(Date,Hour,Consumption))

fused_lasso_hourly_mape = fused_lasso_hourly_final%>%group_by(Date,Model)%>%summarise(Mape=mean(abs(Prediction-Consumption)/Consumption))%>%ungroup()

fused_lasso_hourly_mape_general = fused_lasso_hourly_mape%>%group_by(Model)%>%summarise(TestStart=min(Date),TestEnd=max(Date),
                                                                                          NDay=length(unique(Date)),
                                                                                          Min_Mape=min(Mape),Q25_Mape=quantile(Mape,0.25),
                                                                                          Median_Mape=median(Mape),Avg_Mape=mean(Mape),
                                                                                          Q75_Mape=quantile(Mape,0.75),Max_Mape=max(Mape))%>%ungroup()

p1 = ggplot(fused_lasso_hourly_final)+geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Consumption,col="Actual"))+
  geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Prediction,col=Model))+
  theme_bw()+labs(x="DateTime")

p2 = ggplot(fused_lasso_hourly_mape)+geom_line(aes(x=Date,y=Mape*100,col=Model))+
  theme_bw()+labs(x="Date",y="MAPE (%)")

gridExtra::grid.arrange(p1,p2,nrow=2)

fused_lasso_hourly_mape_general %>%
kbl() %>%
  kable_classic(full_width = F, html_font = "Cambria")
Model TestStart TestEnd NDay Min_Mape Q25_Mape Median_Mape Avg_Mape Q75_Mape Max_Mape
fused_lasso_hourly 2020-11-01 2020-11-30 30 0.0083398 0.0158916 0.0187858 0.0242732 0.0251494 0.0769423

We observe that;

  • The models of consecutive hours are similar in terms of the coefficients of features as L1 penalized regression.
  • By fusing, it is expected that consecutive features may take similar coefficents but it cannot be seen in our results.
  • The performances are similar to glmnet performances.

f)

In this part, we compare all results.

full_comparison = rbind(naive_mape,lm_long_mape,lm_hourly_mape,glmnet_lasso_hourly_mape,fused_lasso_hourly_mape)
full_comparison_general = rbind(naive_mape_general,lm_long_mape_general,lm_hourly_mape_general,glmnet_lasso_hourly_mape_general,fused_lasso_hourly_mape_general)
ggplot(full_comparison)+geom_boxplot(aes(x=Model,y=Mape*100,fill=Model))+labs(y="MAPE (%)")+theme_bw()+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

full_comparison_general %>%
kbl() %>%
  kable_classic(full_width = F, html_font = "Cambria")
Model TestStart TestEnd NDay Min_Mape Q25_Mape Median_Mape Avg_Mape Q75_Mape Max_Mape
naive_lag48 2020-11-01 2020-11-30 30 0.0074837 0.0261611 0.0586450 0.0778920 0.1453798 0.2038553
naive_lag168 2020-11-01 2020-11-30 30 0.0112858 0.0186489 0.0292689 0.0345367 0.0374015 0.1234274
linear_regression_long 2020-11-01 2020-11-30 30 0.0121335 0.0242730 0.0406424 0.0412358 0.0551997 0.0889939
linear_regression_hourly 2020-11-01 2020-11-30 30 0.0095117 0.0252687 0.0443367 0.0424977 0.0541299 0.0850646
glmnet_lasso_hourly_lambdamin 2020-11-01 2020-11-30 30 0.0079057 0.0153607 0.0187656 0.0243023 0.0255123 0.0801177
glmnet_lasso_hourly_lambda1se 2020-11-01 2020-11-30 30 0.0069945 0.0146829 0.0194586 0.0241893 0.0277244 0.0651066
fused_lasso_hourly 2020-11-01 2020-11-30 30 0.0083398 0.0158916 0.0187858 0.0242732 0.0251494 0.0769423
  • Lasso and Fused penalized regressions give close results. However, fused regression performances have smaller variances.
  • Naive lag48 gives the worst results.
  • Linear regression is slightly behind of naive lag168 in terms of performance.